import numpy as np
import pandas as pd
import matplotlib as  mpl
import matplotlib.pyplot as  plt
import seaborn as sns

pd.set_option('display.max_columns', 500)

data = pd.read_csv(r'..\taobao\data\taobao_1.csv')
print(data.head())
print(data.shape)

# 数据分析
# 日期流量指标
pv_daily = data.groupby('日期').count()['用户名'].rename('pv_daily')
uv_daily = data.groupby('日期')['用户名'].nunique().rename('uv_daily')
pv_uv_daily = pd.concat([pv_daily, uv_daily], axis=1)
pv_uv_daily.to_csv(r'..\taobao\data\pv_uv_daily.csv', encoding="utf_8_sig")

# 日内流量指标
pv_hour = data.groupby('小时').count()['用户名'].rename('pv_hour')
uv_hour = data.groupby('小时')['用户名'].nunique().rename('uv_hour')
pv_uv_hour = pd.concat([pv_hour, uv_hour], axis=1)
pv_uv_hour.to_csv(r'..\taobao\data\pv_uv_hour.csv', encoding="utf_8_sig")
